# standard imports
import pandas as pd
import plotly.express as px


# load data from each csv files and assign new DataFrame names to each
# read_csv with iso-8859-1 encoding

contacts_1 = pd.read_csv('ops_case_study__contacts.csv', encoding='iso-8859-1')
specialists_1 = pd.read_csv('ops_case_study__specialists.csv', encoding='iso-8859-1')
surveys_1 = pd.read_csv('ops_case_study__surveys.csv', encoding='iso-8859-1')
users_1 = pd.read_csv('ops_case_study__users.csv', encoding='iso-8859-1')

# We first copy each DFs to new DF names so that we can have a copy of the original imports when needed
# Assign new Data Frame names to each copied Data Frames
contacts = contacts_1.copy()
specialists = specialists_1.copy()
surveys = surveys_1.copy()
users = users_1.copy()


#View the shape for each DataFrames
print('contacts', contacts.shape)
print('specialists', specialists.shape)
print('surveys', surveys.shape)
print('users', users.shape)

contacts (75014, 16)
specialists (202, 4)
surveys (5032, 5)
users (42323, 8)


# Viewing first 5 rows
contacts.head()


# Viewing first 5 rows
specialists.head()


# Viewing first 5 rows
surveys.head()


# Viewing first 5 rows
users.head()


# Merging Contacts to Specialists
combined2 = contacts.merge(specialists, how='inner', left_on= 'specialist_id', right_on='id')
combined2.shape

(75014, 20)


# Viewing First 5 rows of combined dataset
combined2.head()


# Dropping Duplicate Columns
combined2 = combined2.drop(['is_inhouse_y','id_y'], axis = 1)


# Viewing First 5 rows of the dataset
combined2.head()


# Merging datasets
combined3 = combined2.merge(surveys, how='outer', left_on= 'id_x', right_on='contact_id')


# Viewing rows and columns
combined3.shape

(75014, 23)


# Viewing first 5 rows
combined3.head()


# Dropping duplicate columns
combined3 = combined3.drop(['contact_id'], axis = 1)


# Merging
combined4 = combined3.merge(users, how='left', left_on= 'user_id', right_on='id')


# First 5 rows
combined4.head()


# Rows and Columns
combined4.shape

(75014, 30)


# Dropping duplicate columns
combined4 = combined4.drop(['id'], axis = 1)


# Viewing first 5 rows of the dataset
combined4.head()


# Converting to csv file
combined4.to_csv('Final_Merged.csv')


# Checking for duplicate data
check_duplicate = combined4['id_x'].duplicated().sum()       
check_duplicate

0


# Finding Number of Available Specialists
headcount = specialists['id'].dropna().count()
print('Total Number of Specialists =', headcount)

Total Number of Specialists = 202


# Finding number of calls attended by the specialists
totalcalls = combined2['handle_time'].dropna().count()
print('Total Calls Attended by Specialists =', totalcalls)

Total Calls Attended by Specialists = 66053


# Finding Avg Response time for Customers
avg_response_time = combined2['wait_time'].dropna().mean()
print('Average Response/Wait Time = %f minutes' %round(avg_response_time/60,2))

Average Response/Wait Time = 0.920000 minutes


# Finding avg time taken to handle Customer
avg_call_time = combined2['handle_time'].dropna().mean()
print('Average Call/Handle Time = %f minutes' %round(avg_call_time/60,2))

Average Call/Handle Time = 11.910000 minutes


# Finding Customers dealt by specialists channelwise
Channelwise_Customers = combined3[combined3.channel.notnull()]
Channelwise_Customers = Channelwise_Customers.groupby('channel', as_index = False)['id_x'].count()
Channelwise_Customers


# Plotting the chart
fig = px.bar(Channelwise_Customers, x="channel", y="id_x", color="channel", title="Number of Customers Dealt by Specialists Channelwise")
fig.show()


# Creating subdata set
top_performers = combined3[['specialist_id','full_name','csat']]


# Filtering the dataset
top_performers = top_performers[top_performers.csat.notnull()]
top_performers = top_performers.groupby('full_name',as_index=False)['csat'].mean()


# Sorting and Extract Best and Worst 5 Performers
top5_performers = top_performers.sort_values(by = 'csat', ascending = False).head()
worst5_performers = top_performers.sort_values(by = 'csat', ascending = True).head()


# Plotting the chart
fig = px.bar(top5_performers, x="full_name", y="csat", color="full_name", title="Top 5 Agents with Highest Average Rating")
fig.show()


# Plotting the chart
fig = px.bar(worst5_performers, x="full_name", y="csat", color="full_name", title="Top 5 Agents with Worst Average Rating")
fig.show()


# Filtering dataset
Languagewise_Customers = combined3[combined3.contact_language.notnull()]
Languagewise_Customers = Languagewise_Customers.groupby('contact_language', as_index = False)['id_x'].count()
Languagewise_Customers


# Plotting the chart
fig = px.bar(Languagewise_Customers, x="contact_language", y="id_x", color="contact_language", title="Language of Customers Dealt by Specialists")
fig.show()


# Filtering the dataset
Country_Customers = combined4[combined4.tnc_country_group.notnull()] # Removing missing rows
Country_Customers = Country_Customers.groupby('tnc_country_group', as_index = False)['id_x'].count() # Counting Customers
Country_Customers


# Plotting the chart
fig = px.bar(Country_Customers, x="tnc_country_group", y="id_x", color="tnc_country_group", title="Country of Customers Dealt by Specialists")
fig.show()


# Filtering the Dataset
OS_Customers = combined4[combined4.os.notnull()]
OS_Customers = OS_Customers.groupby('os', as_index = False)['id_x'].count()
OS_Customers


# Plotting the chart
fig = px.bar(OS_Customers, x="os", y="id_x", color="os", title="OS of Customers Dealt by Specialists")
fig.show()


# Filtering the dataset
resolved = combined3[combined3.issue_resolved.notnull()]
resolved = resolved[resolved.issue_resolved == True]
resolved = resolved['issue_resolved'].count()
not_resolved = combined3[combined3.issue_resolved.notnull()]
not_resolved = not_resolved[not_resolved.issue_resolved == False]
not_resolved = not_resolved['issue_resolved'].count()


# Calculating Resolution Percentage
call_resolution = resolved/(resolved+not_resolved)*100
print("Call Resolution Percentage = %f percent" %call_resolution)

Call Resolution Percentage = 70.091415 percent


# Number of Customers Calculation
customer_count = contacts['id'].dropna().nunique()
customer_count

75014


# Number of Calls Per day Calculation
contacts['initiated_date'] = pd.to_datetime(contacts['initiated_date'])
contacts['intiated_day'] = contacts.initiated_date.dt.day
frequency_of_call = contacts['intiated_day'].mean()
frequency_of_call

15.540965686405205


# Average call time in minutes
avg_call_time = avg_call_time/60
avg_call_time

11.906010577364642


# Calculating Workforce Required perday assuming each specialist works 480 minutes a day
Wokforce = (customer_count*frequency_of_call*avg_call_time)/(480*headcount)
Workforce = round(Wokforce,0)
print('Workforce Required Per day of Week =',Workforce)

Workforce Required Per day of Week = 143.0


# Viewing Number of customers channelwise
Channelwise_Customers


# Calculating the percentage of the distribution
call_pecentage = 7900/customer_count*100
chat_percentage = 67114/customer_count*100


# Calculating estimated workforce required for each channel
Wokforce_chat = round((chat_percentage*Workforce)/100,0)
Wokforce_call = round((call_pecentage*Workforce)/100,0)


# Printing Chat Workforce Requirement
print('Chat Workforce Required =', Wokforce_chat)

Chat Workforce Required = 128.0


# Printing Call Workforce Requirement
print('Call Workforce Required =', round(Wokforce_call,0))

Call Workforce Required = 15.0


# Creating a sub dataset for Task2
task2 = combined3[['response_date','specialist_id','full_name','is_inhouse_x','csat','issue_resolved','wait_time']]


task2 = task2[task2['is_inhouse_x'].notna()] # Removing missing values
task2 = task2[task2['is_inhouse_x'] == True] # Filtering rows where in_house is true
task2 = task2 = task2[task2['response_date'].notna()] # Removing rows where response_date is missing
task2 = task2[task2.issue_resolved.notnull()] # Filtering rows with missing rows
task2 = task2[task2.issue_resolved == True] # Filtering those rows where customer issue has been resolved
task2.reset_index(inplace = True) # Resetting the index
task2.head() # Viewing the first 5 rows
task2 = task2.groupby(['response_date','full_name'], as_index=False).mean() # Grouping the dataset using mean
task2['response_date'] = pd.to_datetime(task2['response_date']) # Converting column to datatime
task2['week'] = task2.response_date.dt.isocalendar().week # Extracting week number from the data
task2 = task2.groupby(['week','full_name'],as_index=False)[['csat','wait_time']].mean() # Grouping dataset by week and name


# Viewing First 5 Rows
task2.head()


# Plotting week 23 Leaderboard
week23_leader = task2[task2.week == 23].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week23_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 23")
fig.show()


# Plotting week 24 Leaderboard
week24_leader = task2[task2.week == 24].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week24_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 24")
fig.show()


# Plotting week 25 Leaderboard
week25_leader = task2[task2.week == 25].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week25_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 25")
fig.show()


# Plotting week 26 Leaderboard
week26_leader = task2[task2.week == 26].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week26_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 26")
fig.show()


# Plotting week 27 Leaderboard
week27_leader = task2[task2.week == 27].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week27_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 27")
fig.show()


# Plotting week 28 Leaderboard
week28_leader = task2[task2.week == 28].sort_values(['csat', 'wait_time'], ascending=[False, True]).head()
fig = px.bar(week27_leader, x="full_name", y="csat", color="full_name", title="Top 5 Agents Week 28")
fig.show()

	id	cs_tag	channel	channel_type	contact_language	cst_initiated	abandoned	is_inhouse	initiated_date	contact_date	product	wait_time	handle_time	specialist_id	user_id	link_id
0	c53370	kyc_issue	chat	1st level	es	True	NaN	True	2020-06-20 16:31:34.000000	NaN	A	NaN	NaN	s95	u41070	NaN
1	c53424	not_tagged	chat	1st level	fr	True	True	True	2020-06-20 16:53:04.000000	NaN	NaN	151.0	NaN	s201	NaN	NaN
2	c52674	not_tagged	chat	1st level	de	True	True	True	2020-06-20 10:56:51.000000	NaN	NaN	150.0	NaN	s201	NaN	NaN
3	c52689	not_tagged	chat	1st level	de	True	True	True	2020-06-20 11:04:12.000000	NaN	NaN	82.0	NaN	s201	NaN	NaN
4	c53214	not_tagged	chat	1st level	es	True	True	True	2020-06-20 15:13:32.000000	NaN	NaN	27.0	NaN	s201	NaN	NaN

	id	is_inhouse	hired_at	full_name
0	s32	False	2019-02-06	Taylor Swift
1	s64	False	2019-05-10	Jennifer Aniston
2	s96	True	2019-08-29	George Clooney
3	s128	True	2019-11-01	Robert Downey Jr.
4	s160	True	2019-12-17	Miley Cyrus

	id	signed_up_at	has_activated_card	closed_account_at	tnc_country_group	os	first_name	last_name
0	u15586	2019-02-28	False	NaN	ITA	android	Bruce	Fuller
1	u38150	2020-05-28	False	NaN	DEU	android	Mitchell	Coleman
2	u11771	2018-11-01	True	NaN	FRA	iOS	Julian	Thornton
3	u94	2015-04-29	True	NaN	DEU	iOS	Stella	Norton
4	u36971	2020-05-23	False	2020-06-09	FRA	iOS	Lois	Lee

	id_x	cs_tag	channel	channel_type	contact_language	cst_initiated	abandoned	is_inhouse_x	initiated_date	contact_date	product	wait_time	handle_time	specialist_id	user_id	link_id	id_y	is_inhouse_y	hired_at	full_name
0	c53370	kyc_issue	chat	1st level	es	True	NaN	True	2020-06-20 16:31:34.000000	NaN	A	NaN	NaN	s95	u41070	NaN	s95	True	2019-08-29	Kourtney Kardashian
1	c53707	not_tagged	call	callback	NaN	False	False	True	2020-06-20 20:17:15.000000	2020-06-20 20:17:16.000000	NaN	1.0	99.0	s95	NaN	NaN	s95	True	2019-08-29	Kourtney Kardashian
2	c53718	not_tagged	call	callback	NaN	False	False	True	2020-06-20 20:27:12.000000	2020-06-20 20:27:13.000000	NaN	1.0	181.0	s95	NaN	NaN	s95	True	2019-08-29	Kourtney Kardashian
3	c53369	not_tagged	chat	1st level	es	True	False	True	2020-06-20 16:31:34.000000	2020-06-20 16:32:20.000000	A	46.0	687.0	s95	u41070	NaN	s95	True	2019-08-29	Kourtney Kardashian
4	c53488	kyc_issue	chat	1st level	es	True	False	True	2020-06-20 17:14:40.000000	2020-06-20 17:17:03.000000	NaN	143.0	440.0	s95	NaN	NaN	s95	True	2019-08-29	Kourtney Kardashian

	id_x	cs_tag	channel	channel_type	contact_language	cst_initiated	abandoned	is_inhouse_x	initiated_date	contact_date	product	wait_time	handle_time	specialist_id	user_id	link_id	hired_at	full_name
0	c53370	kyc_issue	chat	1st level	es	True	NaN	True	2020-06-20 16:31:34.000000	NaN	A	NaN	NaN	s95	u41070	NaN	2019-08-29	Kourtney Kardashian
1	c53707	not_tagged	call	callback	NaN	False	False	True	2020-06-20 20:17:15.000000	2020-06-20 20:17:16.000000	NaN	1.0	99.0	s95	NaN	NaN	2019-08-29	Kourtney Kardashian
2	c53718	not_tagged	call	callback	NaN	False	False	True	2020-06-20 20:27:12.000000	2020-06-20 20:27:13.000000	NaN	1.0	181.0	s95	NaN	NaN	2019-08-29	Kourtney Kardashian
3	c53369	not_tagged	chat	1st level	es	True	False	True	2020-06-20 16:31:34.000000	2020-06-20 16:32:20.000000	A	46.0	687.0	s95	u41070	NaN	2019-08-29	Kourtney Kardashian
4	c53488	kyc_issue	chat	1st level	es	True	False	True	2020-06-20 17:14:40.000000	2020-06-20 17:17:03.000000	NaN	143.0	440.0	s95	NaN	NaN	2019-08-29	Kourtney Kardashian

Data¶

Task¶

1. Customer Service Overview¶

2. CS Specialists.¶

Importing Neccessary Libraries¶

Importing Datasets into Pandas Dataframe¶

Combining Datasets¶

Merging Specialists and Contacts¶

Merging Combined2 Dataset with Surveys Dataset¶

Merging Combined3 with Users Dataset¶

Task 1A (Overview)¶

1. HeadCount¶

2. Total Calls¶

3. Avg Response Time¶

4. Avg Call Time¶

5. Channel Wise¶

6. Performance Wise¶

7. Language Wise¶

8. Country Wise¶

9. OS Wise¶

10. Call Resolution¶

Task 1B (Predicting Number of Workforce)¶

Task 2 (Creating Weekly Leaderboards According to KPI Performance)¶

Plotting Weekly Leadership Graphs based on Score, Wait_Time and Issue_Resolved¶

	contact_id	response_date	agent_satisfaction	csat	issue_resolved
0	c53109	2020-06-21 08:02:09.000000	9	8	True
1	c52087	2020-06-21 01:15:08.000000	10	8	False
2	c52532	2020-06-21 18:33:11.000000	4	4	False
3	c53651	2020-06-21 11:35:20.000000	10	10	True
4	c52529	2020-06-21 06:37:53.000000	9	8	True

	tnc_country_group	id_x
0	AUT	1303
1	DEU	13573
2	ESP	4050
3	FRA	24497
4	GBR	153
5	GrE	4864
6	ITA	7049
7	NEuro	868

	week	full_name	csat	wait_time
0	23	Alec Baldwin	9.250000	42.00
1	23	Alyson Hannigan	9.250000	11.75
2	23	Amber Heard	9.000000	53.50
3	23	Ashton Kutcher	9.500000	9.50
4	23	Barack Obama	9.090909	99.00

	contact_language	id_x
0	de	10664
1	en	17008
2	es	5428
3	fr	27882
4	it	7748